

*****************************************************
*citations, generality and orignality (Hall, Jaffe, Trajtenberg 2001)
*****************************************************

*generality
use "$rawdatapatent\patent_citation", clear
ren patent_id temp
ren citation_id patent_id
merge m:1 patent_id using "$rawdatapatent\patent_application", keep(1 3) keepus(date) nogen
gen year=substr(date,1,4)
destring year, replace
replace year=. if year>2018
ren patent_id citation_id
ren temp patent_id
drop date
merge m:1 patent_id using "$rawdatapatent\patent_application", keep(1 3) keepus(date) nogen
gen cit_year=substr(date,1,4)
destring cit_year, replace
replace cit_year=. if cit_year>2018
replace sequence=0
merge m:1 patent_id sequence using "$rawdatapatent\uspc_current", keep(1 3) nogen
gen cit=1
gen cit_2year= (cit_year<=year+2&year!=.)
drop patent_id
ren citation_id patent_id
collapse (sum) cit cit_2year, by(patent_id mainclass)
by patent_id: egen ncited=sum(cit)
by patent_id: egen ncited_2year=sum(cit_2year)
gen share=cit/ncited
gen sharesq=share^2
by patent_id: egen generality=sum(sharesq)
replace generality=1-generality
collapse ncited* generality, by(patent_id)
save "$data\patent_citation", replace

*originality
use "$rawdatapatent\patent_citation", clear
replace sequence=0
ren patent_id temp
ren citation_id patent_id
merge m:1 patent_id sequence using "$rawdatapatent\uspc_current", keep(1 3) nogen
drop patent_id
ren temp patent_id
gen cit=1
collapse (sum) cit, by(patent_id mainclass)
by patent_id: egen ncitation=sum(cit)
gen share=cit/ncitation
gen sharesq=share^2
by patent_id: egen originality=sum(sharesq)
replace originality=1-originality
collapse originality ncitation, by(patent_id)
merge 1:1 patent_id using "$data\patent_citation", nogen
save "$data\patent_citation", replace


*****************************************************
*Match patent to UMETRICS
*****************************************************

use "$rawdatapatent\patent_umetrics_crosswalk", clear

ren patent_number number
merge m:1 number using "$rawdatapatent\patent", keep(1 3) keepus(date) nogen
gen grant_year=substr(date,1,4)
gen grant_month=substr(date,6,2)
destring grant_year grant_month, replace

drop date
ren number patent_id
merge m:1 patent_id using "$rawdatapatent\patent_application", keep(1 3) keepus(date) nogen
gen app_year=substr(date,1,4)
gen app_month=substr(date,6,2)
destring app_year app_month, replace

save "$data\patent_umetrics", replace

*add assignee information

use "$rawdatapatent\patent_assignee_crosswalk", clear
ren assignee_id id
merge m:1 id using "$rawdatapatent\assignee", keep(1 3) nogen
drop if id==""
replace type="" if type=="0"
destring type, replace
ren type temp
bysort patent_id: egen type=min(temp)
collapse type (first) organization , by (patent_id)
ren type assignee_type
save "$temp\assigneetype", replace

use "$data\patent_umetrics", clear
merge m:1 patent_id using "$temp\assigneetype", keep(1 3) nogen
save "$data\patent_umetrics", replace


*merge with Tania's data
use "$data\patent_umetrics", clear
gen patnum=patent_id
destring patnum, replace force
merge m:1 patnum using "N:\GrpHOBAHE\approved_import_requests\Alex_2021-05-27\Patents_Tania_v2", keep(1 3)
drop _merge no_cites fyear
ren cit_received ncited
ren scaled_cit_rec_f scaled1
ren scaled_cit_rec_t scaled2
merge m:1 cpc_class using "$data\basic_patent_class", keep(1 3) nogen
compress
save "$data\patent_umetrics", replace

*merge with closeness to science data
use "$data\patent_umetrics", clear
gen patent=patent_id
merge m:1 patent using "N:\GrpHOBAHE\approved_import_requests\Alex_2020-07-27\patent_use_of_science.dta", keepus(artci_q4 jif_avg_all jif_med_all jif_avg_q4 jif_tot_all) keep(1 3)
drop _merge
compress
drop patent
save "$data\patent_umetrics", replace

*inventor data

use "$data\patent_umetrics", clear
merge m:1 patent_id using "$temp\assigneetype", keep(1 3) nogen
gen priv_assignee=(assigneetype=="Forprofit")
gen npatent=1
gen npatent_poscites=(ncited>=1)
gen npatent_highcites=(scaled1>3.03|scaled2>3|ncited>28)
gen npatent_highoriginality=(originality>0.32&originality!=.)
gen npatent_highgenerality=(generality>0.177&generality!=.)
gen npatent_basic=basic_patent_class
gen npatent_zerocite=(scaled1==0)
gen npatent_quartile1=(scaled1>0 & scaled1<0.337)
gen npatent_quartile2=(scaled1>=0.337 & scaled1<0.807)
gen npatent_quartile3=(scaled1>=0.807 & scaled1<1.835)
gen npatent_quartile4=(scaled1>=1.835 & scaled1<.)
gen npatent_artq4=(artci_q4>18&artci_q4!=.)
gen npatent_jif=(jif_avg_all>2.9&jif_avg_all!=.)
gen npatent_totjif=(jif_tot_all>53&jif_tot_all!=.)
gen npatent_totjif2=(jif_tot_all>194&jif_tot_all!=.)
gen npatent_totjif3=(jif_tot_all>450&jif_tot_all!=.)
gen npatent_privassignee=priv_assignee
gen average_citation=ncited
gen average_citation_scale=scaled1
collapse (sum) npatent* ncited* scaled* (mean) average_citation* originality generality, by(iris app_year)
ren app_year year
save "$data\inventor_umetrics_byyear", replace



